import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "notebook"
world_data = pd.read_csv('Data/our-world-can.csv')
world_data
| Entity | Code | Day | total_tests | 142601-annotations | Total confirmed deaths due to COVID-19 | Total confirmed cases of COVID-19 | |
|---|---|---|---|---|---|---|---|
| 0 | 2020 Summer Olympics athletes & staff | NaN | 2021-06-19 | NaN | NaN | NaN | 1.0 |
| 1 | 2020 Summer Olympics athletes & staff | NaN | 2021-06-20 | NaN | NaN | NaN | 1.0 |
| 2 | 2020 Summer Olympics athletes & staff | NaN | 2021-06-21 | NaN | NaN | NaN | 1.0 |
| 3 | 2020 Summer Olympics athletes & staff | NaN | 2021-06-22 | NaN | NaN | NaN | 1.0 |
| 4 | 2020 Summer Olympics athletes & staff | NaN | 2021-06-23 | NaN | NaN | NaN | 1.0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 105718 | Zimbabwe | ZWE | 2021-07-19 | NaN | NaN | 2697.0 | 85732.0 |
| 105719 | Zimbabwe | ZWE | 2021-07-20 | NaN | NaN | 2747.0 | 88415.0 |
| 105720 | Zimbabwe | ZWE | 2020-03-20 | NaN | NaN | NaN | 1.0 |
| 105721 | Zimbabwe | ZWE | 2020-03-21 | NaN | NaN | NaN | 3.0 |
| 105722 | Zimbabwe | ZWE | 2020-03-22 | NaN | NaN | NaN | 3.0 |
105723 rows × 7 columns
world_data = world_data.rename(columns={'Entity': 'Country', 'total_tests': 'Number of Tests Performed', '142601-annotations': 'Tested T/F', 'Total confirmed deaths due to COVID-19': 'Deaths', 'Total confirmed cases of COVID-19': 'Cases'})
world_data
| Country | Code | Day | Number of Tests Performed | Tested T/F | Deaths | Cases | |
|---|---|---|---|---|---|---|---|
| 0 | 2020 Summer Olympics athletes & staff | NaN | 2021-06-19 | NaN | NaN | NaN | 1.0 |
| 1 | 2020 Summer Olympics athletes & staff | NaN | 2021-06-20 | NaN | NaN | NaN | 1.0 |
| 2 | 2020 Summer Olympics athletes & staff | NaN | 2021-06-21 | NaN | NaN | NaN | 1.0 |
| 3 | 2020 Summer Olympics athletes & staff | NaN | 2021-06-22 | NaN | NaN | NaN | 1.0 |
| 4 | 2020 Summer Olympics athletes & staff | NaN | 2021-06-23 | NaN | NaN | NaN | 1.0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 105718 | Zimbabwe | ZWE | 2021-07-19 | NaN | NaN | 2697.0 | 85732.0 |
| 105719 | Zimbabwe | ZWE | 2021-07-20 | NaN | NaN | 2747.0 | 88415.0 |
| 105720 | Zimbabwe | ZWE | 2020-03-20 | NaN | NaN | NaN | 1.0 |
| 105721 | Zimbabwe | ZWE | 2020-03-21 | NaN | NaN | NaN | 3.0 |
| 105722 | Zimbabwe | ZWE | 2020-03-22 | NaN | NaN | NaN | 3.0 |
105723 rows × 7 columns
can = world_data[world_data['Country'] == 'Canada']
usa = world_data[world_data['Country'] == 'United States']
usa = usa.drop([98765,98764,98763,98762])
can
| Country | Code | Day | Number of Tests Performed | Tested T/F | Deaths | Cases | |
|---|---|---|---|---|---|---|---|
| 16800 | Canada | CAN | 2020-01-31 | 0.0 | tests performed | NaN | 4.0 |
| 16801 | Canada | CAN | 2020-02-08 | 63.0 | tests performed | NaN | 7.0 |
| 16802 | Canada | CAN | 2020-02-16 | 109.0 | tests performed | NaN | 7.0 |
| 16803 | Canada | CAN | 2020-02-21 | 166.0 | tests performed | NaN | 9.0 |
| 16804 | Canada | CAN | 2020-02-24 | 212.0 | tests performed | NaN | 10.0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 17337 | Canada | CAN | 2020-02-22 | NaN | NaN | NaN | 9.0 |
| 17338 | Canada | CAN | 2020-02-23 | NaN | NaN | NaN | 9.0 |
| 17339 | Canada | CAN | 2020-02-28 | NaN | NaN | NaN | 15.0 |
| 17340 | Canada | CAN | 2020-03-02 | NaN | NaN | NaN | 32.0 |
| 17341 | Canada | CAN | 2020-03-04 | NaN | NaN | NaN | 42.0 |
542 rows × 7 columns
usa
| Country | Code | Day | Number of Tests Performed | Tested T/F | Deaths | Cases | |
|---|---|---|---|---|---|---|---|
| 98220 | United States | USA | 2020-03-01 | 348.0 | tests performed | 1.0 | 32.0 |
| 98221 | United States | USA | 2020-03-02 | 861.0 | tests performed | 6.0 | 55.0 |
| 98222 | United States | USA | 2020-03-03 | 1480.0 | tests performed | 7.0 | 74.0 |
| 98223 | United States | USA | 2020-03-04 | 2370.0 | tests performed | 11.0 | 107.0 |
| 98224 | United States | USA | 2020-03-05 | 3587.0 | tests performed | 12.0 | 184.0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 98757 | United States | USA | 2020-02-20 | NaN | NaN | NaN | 14.0 |
| 98758 | United States | USA | 2020-02-21 | NaN | NaN | NaN | 16.0 |
| 98759 | United States | USA | 2020-02-22 | NaN | NaN | NaN | 16.0 |
| 98760 | United States | USA | 2020-02-23 | NaN | NaN | NaN | 16.0 |
| 98761 | United States | USA | 2020-02-24 | NaN | NaN | NaN | 16.0 |
542 rows × 7 columns
can_pivot = can.pivot_table(index = 'Day', values = ['Cases'], aggfunc='sum').reset_index()
usa_pivot = usa.pivot_table(index = 'Day', values = ['Cases'], aggfunc='sum').reset_index()
can_pivot.insert(0, 'Country', ['Canada']*542, True)
can_pivot
| Country | Day | Cases | |
|---|---|---|---|
| 0 | Canada | 2020-01-26 | 1.0 |
| 1 | Canada | 2020-01-27 | 1.0 |
| 2 | Canada | 2020-01-28 | 2.0 |
| 3 | Canada | 2020-01-29 | 2.0 |
| 4 | Canada | 2020-01-30 | 2.0 |
| ... | ... | ... | ... |
| 537 | Canada | 2021-07-16 | 1430127.0 |
| 538 | Canada | 2021-07-17 | 1430437.0 |
| 539 | Canada | 2021-07-18 | 1430740.0 |
| 540 | Canada | 2021-07-19 | 1431378.0 |
| 541 | Canada | 2021-07-20 | 1431691.0 |
542 rows × 3 columns
usa_pivot.insert(0, 'Country', ['United States']*542, True)
usa_pivot
| Country | Day | Cases | |
|---|---|---|---|
| 0 | United States | 2020-01-22 | 1.0 |
| 1 | United States | 2020-01-23 | 1.0 |
| 2 | United States | 2020-01-24 | 2.0 |
| 3 | United States | 2020-01-25 | 2.0 |
| 4 | United States | 2020-01-26 | 5.0 |
| ... | ... | ... | ... |
| 537 | United States | 2021-07-16 | 34054952.0 |
| 538 | United States | 2021-07-17 | 34067912.0 |
| 539 | United States | 2021-07-18 | 34079960.0 |
| 540 | United States | 2021-07-19 | 34132071.0 |
| 541 | United States | 2021-07-20 | 34174774.0 |
542 rows × 3 columns
added_data = pd.concat([usa_pivot, can_pivot])
added_data
| Country | Day | Cases | |
|---|---|---|---|
| 0 | United States | 2020-01-22 | 1.0 |
| 1 | United States | 2020-01-23 | 1.0 |
| 2 | United States | 2020-01-24 | 2.0 |
| 3 | United States | 2020-01-25 | 2.0 |
| 4 | United States | 2020-01-26 | 5.0 |
| ... | ... | ... | ... |
| 537 | Canada | 2021-07-16 | 1430127.0 |
| 538 | Canada | 2021-07-17 | 1430437.0 |
| 539 | Canada | 2021-07-18 | 1430740.0 |
| 540 | Canada | 2021-07-19 | 1431378.0 |
| 541 | Canada | 2021-07-20 | 1431691.0 |
1084 rows × 3 columns
usa_pivot2 = usa_pivot.copy()
can_pivot2 = can_pivot.copy()
usa_pivot2.insert(3, 'Cases / Population', usa_pivot['Cases']/328200000, True)
can_pivot2.insert(3, 'Cases / Population', can_pivot['Cases']/37590000, True)
usa_pivot2 = usa_pivot2.drop(columns='Cases')
cam_pivot2 = can_pivot2.drop(columns='Cases')
usa_pivot2
| Country | Day | Cases / Population | |
|---|---|---|---|
| 0 | United States | 2020-01-22 | 3.046923e-09 |
| 1 | United States | 2020-01-23 | 3.046923e-09 |
| 2 | United States | 2020-01-24 | 6.093845e-09 |
| 3 | United States | 2020-01-25 | 6.093845e-09 |
| 4 | United States | 2020-01-26 | 1.523461e-08 |
| ... | ... | ... | ... |
| 537 | United States | 2021-07-16 | 1.037628e-01 |
| 538 | United States | 2021-07-17 | 1.038023e-01 |
| 539 | United States | 2021-07-18 | 1.038390e-01 |
| 540 | United States | 2021-07-19 | 1.039978e-01 |
| 541 | United States | 2021-07-20 | 1.041279e-01 |
542 rows × 3 columns
can_pivot2
| Country | Day | Cases | Cases / Population | |
|---|---|---|---|---|
| 0 | Canada | 2020-01-26 | 1.0 | 2.660282e-08 |
| 1 | Canada | 2020-01-27 | 1.0 | 2.660282e-08 |
| 2 | Canada | 2020-01-28 | 2.0 | 5.320564e-08 |
| 3 | Canada | 2020-01-29 | 2.0 | 5.320564e-08 |
| 4 | Canada | 2020-01-30 | 2.0 | 5.320564e-08 |
| ... | ... | ... | ... | ... |
| 537 | Canada | 2021-07-16 | 1430127.0 | 3.804541e-02 |
| 538 | Canada | 2021-07-17 | 1430437.0 | 3.805366e-02 |
| 539 | Canada | 2021-07-18 | 1430740.0 | 3.806172e-02 |
| 540 | Canada | 2021-07-19 | 1431378.0 | 3.807869e-02 |
| 541 | Canada | 2021-07-20 | 1431691.0 | 3.808702e-02 |
542 rows × 4 columns
mod_data = pd.concat([usa_pivot2, can_pivot2])
fig = px.line(can_pivot, x = "Day", y = "Cases",
template = "plotly_dark", title = "Confirmed Cases Over Time in Canada",
color_discrete_sequence = ['Red'])
fig.show()
fig = px.line(usa_pivot, x = "Day", y = "Cases",
template = "plotly_dark", title = "Confirmed Cases Over Time in the USA",
color_discrete_sequence = ['Blue'])
fig.show()
fig = px.line(added_data, x='Day', y='Cases', color='Country', template = "plotly_dark", title = "Confirmed Cases Over Time in the USA vs Canada",)
fig.show()
fig = px.line(mod_data, x='Day', y='Cases / Population', color='Country', template = "plotly_dark", title = "Cases / Population in the USA vs Canada",)
fig.show()